from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Importing libraries
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')


#Importing the dataset
df = pd.read_csv("/content/drive/MyDrive/Project - YHills - Personal Loan Modelling/dataset/Bank_Personal_Loan_Modelling.csv")


#To display top 5 rows
df.head()


#To display bottom 5 rows
df.tail()


df.head(10).style.background_gradient(cmap="PuBuGn")


# To find the dtypes in the DataFrame of each columns
df.dtypes

ID                      int64
Age                     int64
Experience              int64
Income                  int64
ZIP Code                int64
Family                  int64
CCAvg                 float64
Education               int64
Mortgage                int64
Personal Loan           int64
Securities Account      int64
CD Account              int64
Online                  int64
CreditCard              int64
dtype: object


df.describe()


# Transpose of df.describe()
df.describe().T


# To check the Dimensionality of the DataFrame
df.shape

(5000, 14)


# To check the total null values
df.isnull().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64


df.nunique()

ID                    5000
Age                     45
Experience              47
Income                 162
ZIP Code               467
Family                   4
CCAvg                  108
Education                3
Mortgage               347
Personal Loan            2
Securities Account       2
CD Account               2
Online                   2
CreditCard               2
dtype: int64


#Drop the Zip Code Column
df.drop(['ZIP Code'], axis = 1)


df[df['Mortgage'] == 0]['Mortgage'].value_counts()

0    3462
Name: Mortgage, dtype: int64


df[df['CCAvg'] == 0]['CCAvg'].value_counts()

0.0    106
Name: CCAvg, dtype: int64


df['Family'].value_counts()

1    1472
2    1296
4    1222
3    1010
Name: Family, dtype: int64


df['Securities Account'].value_counts()

0    4478
1     522
Name: Securities Account, dtype: int64


df['CD Account'].value_counts()

0    4698
1     302
Name: CD Account, dtype: int64


df['CreditCard'].value_counts()

0    3530
1    1470
Name: CreditCard, dtype: int64


df['Education'].value_counts()

1    2096
3    1501
2    1403
Name: Education, dtype: int64


# 
df['Online'].value_counts()

1    2984
0    2016
Name: Online, dtype: int64


age = df['Age'].value_counts().head(25)
ax = age.plot.bar(width=.9,color="Green") 
plt.title("Age",size=20)
plt.xlabel("Age")
plt.ylabel("Count")
for i, v in age.reset_index().iterrows():
    ax.text(i, v.Age + 1.5, v.Age, color='green',rotation=90)


experience = df['Experience'].value_counts().head(25)
ax = experience.plot.bar(width=.9,color="Purple") 
plt.title("Experience",size=20)
plt.xlabel("Experience")
plt.ylabel("Count")
for i, v in experience.reset_index().iterrows():
    ax.text(i, v.Experience + 1.5, v.Experience, color='Purple',rotation=90)


Income = df['Income'].value_counts().head(25)
ax = Income.plot.bar(width=.9,color="Indigo") 
plt.title("Income",size=20)
plt.xlabel("Income in Dollar")
plt.ylabel("Count")
for i, v in Income.reset_index().iterrows():
    ax.text(i, v.Income + 1.5, v.Income, color='Indigo',rotation=90)


maxIncome = df.loc[(df['Personal Loan']==1),'Income'].max()
minIncome = df.loc[(df['Personal Loan']==1),'Income'].min()
print(maxIncome)
print(minIncome)

203
60


sns.scatterplot(x="Personal Loan", y="Income", data=df, hue="Personal Loan")

<matplotlib.axes._subplots.AxesSubplot at 0x7f032775a190>


sns.barplot(x="Age", y="Experience", data=df, ci=None)

<matplotlib.axes._subplots.AxesSubplot at 0x7f03278d0090>


sns.scatterplot(x="Experience", y="Income", data=df, color='black')

<matplotlib.axes._subplots.AxesSubplot at 0x7f032712d950>


sns.scatterplot(x="Income", y="Mortgage", hue="Mortgage", data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0326f9cf50>


sns.stripplot(y="Mortgage", x="Personal Loan", data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f0328eb1550>


sns.relplot(x="Education", y="Mortgage", data=df, hue="Education")

<seaborn.axisgrid.FacetGrid at 0x7f032562f7d0>


sns. relplot(x="Mortgage", y="Family", data=df, hue="Family")

<seaborn.axisgrid.FacetGrid at 0x7f0325657e50>


family = df.Family[df['Personal Loan']==1].value_counts().sort_index()
#ax=plot(kind='bar',alpha=0.5,color="Orange")
ax = family.plot.bar(width=.9,color="Gray") 
plt.title("Family VS Personal Loan",size=20)
plt.xlabel("Family")
plt.ylabel("Count of taken Personal Loan")
for i, v in family.reset_index().iterrows():
    ax.text(i, v.Family + 1.5, v.Family, color='Indigo')


df.isnull().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64


df.lt(0).any()

ID                    False
Age                   False
Experience             True
Income                False
ZIP Code              False
Family                False
CCAvg                 False
Education             False
Mortgage              False
Personal Loan         False
Securities Account    False
CD Account            False
Online                False
CreditCard            False
dtype: bool


# To check the counts of negative values in experience column
df[df['Experience'] < 0]['Experience'].count()

52


#To check the ammount of negative values
df[df['Experience'] < 0]['Experience'].value_counts()

-1    33
-2    15
-3     4
Name: Experience, dtype: int64


# Dropping the ID and Experience column
df.drop(['ID','Experience'],axis=1,inplace=True)


#To display top 5 rows
df.head()


#To display bottom 5 rows
df.tail()


#To check the names of each column
df.columns

Index(['Age', 'Income', 'ZIP Code', 'Family', 'CCAvg', 'Education', 'Mortgage',
       'Personal Loan', 'Securities Account', 'CD Account', 'Online',
       'CreditCard'],
      dtype='object')


sns.distplot(df["Age"])
plt.show()


sns.distplot(df["Income"])

<matplotlib.axes._subplots.AxesSubplot at 0x7f03255e8110>


sns.distplot(df["Mortgage"])

<matplotlib.axes._subplots.AxesSubplot at 0x7f03255e8250>


sns.distplot(df["CCAvg"])

<matplotlib.axes._subplots.AxesSubplot at 0x7f031cc5fd50>


# Count Plot to show Family Distributions
sns.countplot(x='Family',data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f031cbc79d0>


# Count Plot to show Education Distributions
sns.countplot(x='Education',data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f03291176d0>


# Count Plot to show Credit Card Distribution
sns.countplot(x='CreditCard',data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f031cb1c3d0>


#  Count Plot to show Online Distributions
sns.countplot(x='Online',data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f031ca8cc90>


sns.boxplot(x='Education',y='Income',hue='Personal Loan',data=df)

<matplotlib.axes._subplots.AxesSubplot at 0x7f031c9ec4d0>


sns.countplot(x="Securities Account", data=df,hue="Personal Loan")

<matplotlib.axes._subplots.AxesSubplot at 0x7f031c9557d0>


sns.countplot(x='Family',data=df,hue='Personal Loan')

<matplotlib.axes._subplots.AxesSubplot at 0x7f031c89aa50>


sns.countplot(x='CD Account',data=df,hue='Personal Loan')

<matplotlib.axes._subplots.AxesSubplot at 0x7f031c8405d0>


# CCAvg Credit average and income are highly correlated
fig, ax = plt.subplots(figsize=(12,10))
sns.heatmap(df.corr(), cmap='afmhot' , annot = True)

<matplotlib.axes._subplots.AxesSubplot at 0x7f03271bd310>


sns.pairplot(df)

<seaborn.axisgrid.PairGrid at 0x7f031cc6db90>


data_X = df.loc[:, df.columns  != 'Personal Loan']
data_Y = df[['Personal Loan']]


data_X


data_Y


from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer(method='yeo-johnson',standardize=False)
pt.fit(data_X['Income'].values.reshape(-1,1))
temp = pt.transform(data_X['Income'].values.reshape(-1,1))
data_X['Income'] = pd.Series(temp.flatten())


# Distplot to show transformed Income variable
sns.distplot(data_X['Income'])
plt.show()


pt = PowerTransformer(method='yeo-johnson',standardize=False)
pt.fit(data_X['CCAvg'].values.reshape(-1,1))
temp = pt.transform(data_X['CCAvg'].values.reshape(-1,1))
data_X['CCAvg'] = pd.Series(temp.flatten())


# Distplot to show transformed CCAvg variable
sns.distplot(data_X['CCAvg'])
plt.show()


data_X['Mortgage_Int'] = pd.cut(data_X['Mortgage'],
                               bins=[0,100,200,300,400,500,600,700],
                               labels= [0,1,2,3,4,5,6],
                               include_lowest =True)
data_X.drop('Mortgage', axis = 1, inplace= True)


## 9.6% of all the applicants get approved for personal loan
tempDF = pd.DataFrame(df['Personal Loan'].value_counts()).reset_index()
tempDF.columns = ['Labels', 'Personal Loan']
fig1, ax1 = plt.subplots(figsize=(10,8))
explode = (0, 0.15)
ax1.pie(tempDF['Personal Loan'] , explode= explode, autopct= '%1.1f%%',
       shadow=True , startangle = 70)
ax1.axis('equal')
plt.title('Personal Loan Percentage')
plt.show()


# To display top 5 rows
data_X.head()


from sklearn.model_selection import train_test_split


# Splitting the data into train and test. 
X_train,X_test,Y_train,Y_test = train_test_split(data_X,data_Y,test_size = 0.3, random_state = 0,stratify = data_Y)


X_train


X_test


Y_train


Y_test


X_train.reset_index(drop= True, inplace= True);
X_test.reset_index(drop= True, inplace= True);
Y_train.reset_index(drop= True, inplace= True);
Y_test.reset_index(drop= True, inplace= True);


X_train


X_test


Y_train


Y_test


from sklearn.preprocessing import StandardScaler


for ind, column in enumerate(X_train.columns):
    scaler = StandardScaler()
    
    #fit to train data
    scaler.fit(X_train[[column]])
    
    #transform train data
    np_array = scaler.transform(X_train[[column]])
    X_train.loc[: , column] = pd.Series(np_array.flatten())
    
    #transform test data
    np_array = scaler.transform(X_test[[column]])
    X_test.loc[: , column] = pd.Series(np_array.flatten())


from sklearn.linear_model import LogisticRegression


model_LR = LogisticRegression(random_state = 0)


model_LR.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


from sklearn.metrics import confusion_matrix, recall_score , precision_score , f1_score , accuracy_score, roc_auc_score


X_test_predictions_model_LR = model_LR.predict(X_test)
X_test_predictions_model_LR

array([1, 0, 0, ..., 0, 0, 0])


# Accuracy of train data
model_LR.score(X_train, Y_train)

0.9568571428571429


# Accuracy of test data
model_LR.score(X_test,Y_test)

0.9546666666666667


# Defining the Confusion Matrix
def Confusion_Matrix(actual, predicted):
    cm = confusion_matrix(actual, predicted)
    fig, ax = plt.subplots(figsize=(8,6))
    ax.set_ylim([0,5])
    sns.heatmap(cm, annot=True, fmt= '.2f', xticklabels= [0,1], yticklabels=[0,1])
    plt.ylabel('Observed')
    plt.xlabel('Predicted')
    plt.show()


Y_test.shape

(1500, 1)


print('Confusion Matrix')
print(Confusion_Matrix(Y_test,X_test_predictions_model_LR.reshape(-1,1)))

Confusion Matrix

None


from sklearn.metrics import confusion_matrix
confusion_matrix_model_LR = confusion_matrix(Y_test,X_test_predictions_model_LR.reshape(-1,1))
confusion_matrix_model_LR

array([[1338,   18],
       [  50,   94]])


from sklearn.metrics import classification_report
print(classification_report(Y_test,X_test_predictions_model_LR))

              precision    recall  f1-score   support

           0       0.96      0.99      0.98      1356
           1       0.84      0.65      0.73       144

    accuracy                           0.95      1500
   macro avg       0.90      0.82      0.85      1500
weighted avg       0.95      0.95      0.95      1500


print("Roc Auc Score: ", roc_auc_score(Y_test,X_test_predictions_model_LR))

Roc Auc Score:  0.819751720747296


from sklearn.ensemble import RandomForestClassifier
model_RF = RandomForestClassifier(n_estimators=500, max_depth=8)


model_RF.fit(X_train, Y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=8, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)


X_test_predictions_model_RF = model_RF.predict(X_test)
X_test_predictions_model_RF

array([1, 0, 0, ..., 0, 0, 0])


# Accuracy of train data
model_RF.score(X_train, Y_train)

0.9945714285714286


# Accuracy of test data
model_RF.score(X_test,Y_test)

0.9873333333333333


print('Confusion Matrix')
print(Confusion_Matrix(Y_test,X_test_predictions_model_RF.reshape(-1,1)))

Confusion Matrix

None


from sklearn.metrics import confusion_matrix
confusion_matrix_RF = confusion_matrix(Y_test,X_test_predictions_model_RF.reshape(-1,1))
confusion_matrix_RF

array([[1354,    2],
       [  17,  127]])


from sklearn.metrics import classification_report
print(classification_report(Y_test,X_test_predictions_model_RF))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1356
           1       0.98      0.88      0.93       144

    accuracy                           0.99      1500
   macro avg       0.99      0.94      0.96      1500
weighted avg       0.99      0.99      0.99      1500


print("ROC AUC Score: ", roc_auc_score(Y_test,X_test_predictions_model_RF))

ROC AUC Score:  0.9402347590953786


from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RepeatedStratifiedKFold


model_DT = DecisionTreeClassifier(random_state=0, max_depth=8)


model_DT.fit(X_train, Y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=8, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')


X_test_predictions_model_DT = model_DT.predict(X_test)
X_test_predictions_model_DT

array([1, 0, 0, ..., 0, 0, 0])


# Accuracy of train data
model_DT.score(X_train, Y_train)

0.996


# Accuracy of test data
model_DT.score(X_test,Y_test)

0.98


print('Confusion Matrix')
print(Confusion_Matrix(Y_test,X_test_predictions_model_DT.reshape(-1,1)))

Confusion Matrix

None


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test,X_test_predictions_model_DT.reshape(-1,1))
cm

array([[1344,   12],
       [  18,  126]])


from sklearn.metrics import classification_report
print(classification_report(Y_test,X_test_predictions_model_DT))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1356
           1       0.91      0.88      0.89       144

    accuracy                           0.98      1500
   macro avg       0.95      0.93      0.94      1500
weighted avg       0.98      0.98      0.98      1500


print("Roc Auc Score: ", roc_auc_score(Y_test,X_test_predictions_model_DT))

Roc Auc Score:  0.933075221238938


from sklearn.naive_bayes import GaussianNB
model_NB = GaussianNB()


model_NB.fit(X_train,Y_train)

GaussianNB(priors=None, var_smoothing=1e-09)


X_test_predictions_model_NB = model_NB.predict(X_test)
X_test_predictions_model_NB

array([1, 0, 0, ..., 0, 0, 0])


# Accuracy of train data
model_NB.score(X_train, Y_train)

0.9105714285714286


# Accuracy of test data
model_NB.score(X_test,Y_test)

0.9153333333333333


print('Confusion Matrix')
print(Confusion_Matrix(Y_test,X_test_predictions_model_NB.reshape(-1,1)))

Confusion Matrix

None


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test,X_test_predictions_model_NB.reshape(-1,1))
cm

array([[1294,   62],
       [  65,   79]])


from sklearn.metrics import classification_report
print(classification_report(Y_test,X_test_predictions_model_NB))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95      1356
           1       0.56      0.55      0.55       144

    accuracy                           0.92      1500
   macro avg       0.76      0.75      0.75      1500
weighted avg       0.91      0.92      0.91      1500


print("Roc Auc Score: ", roc_auc_score(Y_test,X_test_predictions_model_NB))

Roc Auc Score:  0.7514441986234022

Feature	Description
Age	Customer's age
Experience	Number of years of professional experience
Income	Annual income of the customer
ZIPCode	Home Address ZIP code
Family	Family size of the customer
CCAvg	Average spending on credit cards per month
Education	Education Level: 1: Undergrad 2: Graduate 3: Advanced/Professional
Mortgage	Value of house mortgage (if any)
Securities Account	Does the customer have a securities account with the bank?
CD Account	Does the customer have a certificate of deposit (CD) account with the bank?
Online	Does the customer use internet banking facilities?
CreditCard	Does the customer uses a credit card issued by UniversalBank?
Personal Loan	Did this customer accept the personal loan offered in the last campaign?

	ID	Age	Experience	Income	ZIP Code	Family	CCAvg	Education	Mortgage	Personal Loan	Securities Account	CD Account	Online	CreditCard
count	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.000000	5000.00000	5000.000000	5000.000000
mean	2500.500000	45.338400	20.104600	73.774200	93152.503000	2.396400	1.937938	1.881000	56.498800	0.096000	0.104400	0.06040	0.596800	0.294000
std	1443.520003	11.463166	11.467954	46.033729	2121.852197	1.147663	1.747659	0.839869	101.713802	0.294621	0.305809	0.23825	0.490589	0.455637
min	1.000000	23.000000	-3.000000	8.000000	9307.000000	1.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.00000	0.000000	0.000000
25%	1250.750000	35.000000	10.000000	39.000000	91911.000000	1.000000	0.700000	1.000000	0.000000	0.000000	0.000000	0.00000	0.000000	0.000000
50%	2500.500000	45.000000	20.000000	64.000000	93437.000000	2.000000	1.500000	2.000000	0.000000	0.000000	0.000000	0.00000	1.000000	0.000000
75%	3750.250000	55.000000	30.000000	98.000000	94608.000000	3.000000	2.500000	3.000000	101.000000	0.000000	0.000000	0.00000	1.000000	1.000000
max	5000.000000	67.000000	43.000000	224.000000	96651.000000	4.000000	10.000000	3.000000	635.000000	1.000000	1.000000	1.00000	1.000000	1.000000

	count	mean	std	min	25%	50%	75%	max
ID	5000.0	2500.500000	1443.520003	1.0	1250.75	2500.5	3750.25	5000.0
Age	5000.0	45.338400	11.463166	23.0	35.00	45.0	55.00	67.0
Experience	5000.0	20.104600	11.467954	-3.0	10.00	20.0	30.00	43.0
Income	5000.0	73.774200	46.033729	8.0	39.00	64.0	98.00	224.0
ZIP Code	5000.0	93152.503000	2121.852197	9307.0	91911.00	93437.0	94608.00	96651.0
Family	5000.0	2.396400	1.147663	1.0	1.00	2.0	3.00	4.0
CCAvg	5000.0	1.937938	1.747659	0.0	0.70	1.5	2.50	10.0
Education	5000.0	1.881000	0.839869	1.0	1.00	2.0	3.00	3.0
Mortgage	5000.0	56.498800	101.713802	0.0	0.00	0.0	101.00	635.0
Personal Loan	5000.0	0.096000	0.294621	0.0	0.00	0.0	0.00	1.0
Securities Account	5000.0	0.104400	0.305809	0.0	0.00	0.0	0.00	1.0
CD Account	5000.0	0.060400	0.238250	0.0	0.00	0.0	0.00	1.0
Online	5000.0	0.596800	0.490589	0.0	0.00	1.0	1.00	1.0
CreditCard	5000.0	0.294000	0.455637	0.0	0.00	0.0	1.00	1.0

	Personal Loan
0	0
1	0
2	0
3	0
4	0
...	...
4995	0
4996	0
4997	0
4998	0
4999	0

	Age	Income	ZIP Code	Family	CCAvg	Education	Securities Account	CreditCard
0	25	6.827583	91107	4	0.845160	1	1	0
1	45	5.876952	90089	3	0.814478	1	1	0
2	39	3.504287	94720	1	0.633777	1	0	0
3	35	8.983393	94112	1	1.107427	2	0	0
4	35	6.597314	91330	4	0.633777	2	0	1

	ID	Age	Experience	Income	ZIP Code	Family	CCAvg	Education	Mortgage	Online	CreditCard
4995	4996	29	3	40	92697	1	1.9	3	0	1	0
4996	4997	30	4	15	92037	4	0.4	1	85	1	0
4997	4998	63	39	24	93023	2	0.3	3	0	0	0
4998	4999	65	40	49	90034	3	0.5	2	0	1	0
4999	5000	28	4	83	92612	3	0.8	1	0	1	1

	ID	Age	Experience	Income	ZIP Code	Family	CCAvg	Education	Mortgage	Personal Loan	Securities Account	Online	CreditCard
0	1	25	1	49	91107	4	1.600000	1	0	0	1	0	0
1	2	45	19	34	90089	3	1.500000	1	0	0	1	0	0
2	3	39	15	11	94720	1	1.000000	1	0	0	0	0	0
3	4	35	9	100	94112	1	2.700000	2	0	0	0	0	0
4	5	35	8	45	91330	4	1.000000	2	0	0	0	0	1
5	6	37	13	29	92121	4	0.400000	2	155	0	0	1	0
6	7	53	27	72	91711	2	1.500000	2	0	0	0	1	0
7	8	50	24	22	93943	1	0.300000	3	0	0	0	0	1
8	9	35	10	81	90089	3	0.600000	2	104	0	0	1	0
9	10	34	9	180	93023	1	8.900000	3	0	1	0	0	0

	Age	Income	ZIP Code	Family	CCAvg	Education	Securities Account	CD Account	Online	CreditCard	Mortgage_Int
3789	51	5.058173	94301	3	0.322049	1	0	0	1	1	0
758	64	5.948841	90266	1	0.814478	2	1	0	0	0	0
2868	52	5.651776	94923	4	0.902279	1	0	0	1	1	0
2550	32	4.661500	93106	1	0.384645	3	0	0	1	0	1
2150	62	7.097040	91320	1	0.544710	1	1	0	0	1	0
...	...	...	...	...	...	...	...	...	...	...	...
3597	56	6.937650	92028	3	0.954467	3	0	0	1	0	0
4670	52	11.394571	94305	1	0.874387	1	0	0	1	0	0
988	63	5.728502	94998	1	0.928941	2	0	0	0	0	0
2037	35	6.991517	95616	2	0.633777	2	0	0	0	1	0
2174	30	9.691160	95605	2	1.179285	1	0	0	1	0	0

	Age	Income	ZIP Code	Family	CCAvg	Education	Securities Account	CD Account	Online	CreditCard	Mortgage_Int
9	34	11.100150	93023	1	1.722825	3	0	0	0	0	0
461	55	8.302424	92123	2	1.271937	1	1	0	0	0	0
3700	48	9.831967	94608	1	1.497521	1	1	0	0	0	0
1559	59	9.049404	92677	4	1.162177	2	0	0	1	0	1
4558	44	8.341020	95521	2	0.322049	1	0	0	1	1	0
...	...	...	...	...	...	...	...	...	...	...	...
2180	58	6.414718	91380	2	0.845160	3	0	0	1	0	0
3484	45	7.044639	92104	3	1.067713	2	0	0	0	0	1
2965	53	5.651776	91605	2	0.322049	3	0	0	0	1	1
2493	34	6.827583	94025	1	1.067713	3	0	0	0	0	0
3224	45	7.299875	94025	3	0.253539	3	1	0	1	0	0

	Personal Loan
0	0
1	0
2	0
3	0
4	0
...	...
3495	0
3496	0
3497	0
3498	0
3499	0

	Personal Loan
0	1
1	0
2	0
3	1
4	0
...	...
1495	0
1496	0
1497	0
1498	0
1499	0

Project Title: Personal Bank Loan Modelling

YHills Internship Project

1. Connecting to Google Drive¶

2. Importing the required Libraries¶

3. Importing Dataset¶

4.2 Dataset Description¶

4. Basic Data Analysis¶

4.1 Checking some rows in the dataset¶

4.2 Checking the highest and lowest values¶

4.3 Check the types of the Data¶

5. Statistical Analysis¶

5.1 Viewing some basic Statistical details¶

5.2 Checking the shape of DataFrame¶

5.3 Checking for Null values¶

6. Exploratory Data Analysis¶

6.1 Checking the number of Unique Elements in each columns¶

6.2 Checking Specific Details¶

6.2.1 Checking the Number of people with zero mortgage¶

6.2.2 Checking the Number of people with zero Credit Card spending per month¶

6.2.3 Obtaining Value counts of Family column¶

6.2.4 Obtaining Value counts of Securities Account column¶

6.2.5 Obtaining Value counts of CD Account column¶

6.2.6 Obtaining Value counts of Credit Card column¶

6.2.7 Obtaining Value counts of Education column¶

6.2.8 Obtaining Value counts of Online column¶

6.3 Plotting different Attributes¶

6.3.1 Counting the Age Attribute¶

6.3.2 Checking the Experience Attribute¶

6.3.3 Checking the Income Attribute¶

6.3.4 Checking the permitted Loans based on Income¶

6.3.5 Comparing Experience with respect to Age Attribute¶

6.3.6 Comparing Income with respect to Experience¶

6.3.7 Comparing Martgage with respect to Income¶

6.3.8 Comparing Mortgage with respect to Personal Loan¶

6.3.9 Comparing Mortage with respect to Education¶

6.3.10 Comparing Family Size with respect to Mortgage¶

6.3.11 Checking the granted loans with respect to Family¶

7. Data Cleaning¶

7.1 Checking if any Column has Null Values¶

7.2 Checking if any Column has Negative values¶

7.3 Dropping Irrelavant column¶

8. Univariate Analysis¶

8.1 Checking Age Distribution¶

8.2 Checking Income Distribution¶

8.3 Checking Mortgage Distribution¶

8.4 Checking Credit Card Average Distribution¶

8.5 Checking Family Distribution¶

8.6 Checking Education Distribution¶

8.7 Checking Credit Card Distribution¶

8.8 Checking Online Distributions¶

9. Multivariate Analysis¶

9.1 Checking Influence of income and education on personal loan¶

9.2 Obtaining the count of Securities Account¶

9.3 Checking the count of Loan granted with respect to Family Size¶

9.4 Obtaining the count of CD Account¶

9.5 Checking Correlation between Credit Card Average and Income¶

10. Overall Attributes Distributions¶

11. Apply necessary transformations for the feature variables¶

11.1 Applying the Yeo Johnson method of Transformation on the Income variable¶

11.2 Applying the Yeo Johnson method of Transformation on the Credit Card Average variable¶

11.3 Binning on Mortgage variable¶

11.4 Univariate Analysis¶

12. Normalising and Splitting the dataset¶

13. Standardizing the Dataset¶

14. Logistic Regression¶

14.1 Creating the Logistic Regression Model¶

14.2 Training the Model¶

14.3 Evaluating the Model¶

14.3.1 Predicting from the Model¶

14.3.2 Obtaining the Accuracy of the Model on Training Data¶

14.3.3 Obtaining the Accuracy of the Model on Testing Data¶

14.3.4 Obtaining the Confusion Matrix¶

14.4. Print all the metrics related for evaluating the model performance¶

14.5 Obtaining the ROC-AUC Score¶

15. Random Forest Classifier¶

15.1 Creating the Random Forest Model¶

15.2 Training the Model¶

15.2 Evaluating the Model¶

15.2.1 Obtaining the Accuracy of the Model on Training Data¶

15.2.2 Obtaining the Accuracy of the Model on Testing Data¶